Slides 9 & 10: AR(1) process example, \(\alpha\)=0.5
#---- AR(1) example----# function to generate an AR(1) series. Includes units in functionar_data <-function(n =500, interval =1, units ="seconds", alpha=0.5){# function arguments# n: integer, desired output dataframe length# interval: integer specifying the number between timestamps# units: character specifying the lubridate period function (e.g. "minutes")# alpha: numeric value less than 1 specifying the AR alpha term.# function requires lubridate and tsibble pacman::p_load(lubridate, tsibble)# use match.fun() to find the lubridate function specifying time units input_units <-match.fun(units)# generate time sequence x <-seq(1:n)*interval %>%input_units()# generate e and y e <-rnorm(n) y <-double(n) y[1] <-0for (i in2:n) {y[i] <- alpha*y[i-1] + e[i]}# make a `tsibble` ts_ex_data <-data.frame(time = x, y) %>%as_tsibble(index = time) # explicit returnreturn(ts_ex_data)}# Alternative to function: Manual creation of an AR(1) time series# # n is length of time series# n <- 500# # generate data with alpha = .5# e <- rnorm(n)# y <- double(n)# y[1] <- 0# for (i in 2:n) {y[i] <- 0.5*y[i-1] + e[i]}set.seed(20)# create data ts_ex_data <-ar_data() # plot time seriesggplot() +geom_line(data = ts_ex_data, aes(x = time, y = y)) +labs(title ="AR(1) time series",x ="Time axis",y ="Outcome")
#---- MA(2) process example----# n is length of time seriesn <-500# generate data with rho1 = .0, rho2 = .8e <-rnorm(n)y <-double(n)y[1] <- e[1]y[2] <- e[2] +0.8*e[1]for (i in3:n) {y[i] <- e[i] +0.8*e[i-1] +0.8*e[i-2]}# make a tsibble ts_ex_data <-tibble(y, time =seq(1,n)) %>%as_tsibble(index = time)# plot time seriesggplot() +geom_line(data = ts_ex_data, aes(x = time, y = y)) +labs(title ="MA(2) time series",x ="Time axis",y ="Outcome")
#---- ARMA(1,1) ----set.seed(200)# n is length of time seriesn <-500# generate data with alpha=0.9, rho = .8e <-rnorm(n)y <-double(n)y[1] <- e[1]for (i in2:n) {y[i] <-0.9*y[i-1] + e[i] +0.8*e[i-1] }# make a tsibble ts_ex_data <-tibble(y, time =seq(1,n)) %>%as_tsibble(index = time)# plot time seriesggplot() +geom_line(data = ts_ex_data, aes(x = time, y = y)) +labs(title ="ARMA(1,1) time series",x ="Time axis",y ="Outcome")
The next chunk shows the slider function and a user-written moving function that can present a running 10th percentile.
#---- Time plot with smoother ----# Using slider function with mean; the lab shows it with the quantile# using a new dataset name to keep separate from other examplests_ex_data_slider <- ts_ex_data %>%mutate(slide_20 =slide_dbl(ts_ex_data$y, ~mean(.x, na.rm =TRUE), .before =10, .after =10) )# User-written moving window function: Functionally this is the same as the# slide function; seeing the code allows you to see how it can be constructed.# Code found here:# https://stackoverflow.com/questions/743812/calculating-moving-averagemoving_fn <-function(x, w, fun, side, ...) {# x = vector with numeric data# w = window length# fun = function to apply# side = side to take, (c)entre, (l)eft or (r)ight# ... = parameters passed on to 'fun' y <-numeric(length(x))for (i inseq_len(length(x))) {if (side %in%c("c", "centre", "center")) { ind <-c((i -floor(w /2)):(i +floor(w /2))) } elseif (side %in%c("l", "left")) { ind <-c((i -floor(w) +1):i) } elseif (side %in%c("r", "right")) { ind <-c(i:(i +floor(w) -1)) } else {stop("'side' must be one of 'centre', 'left', 'right'", call. =FALSE) } ind <- ind[ind %in%seq_len(length(x))] y[i] <-fun(x[ind], ...) } y}# and now create any variation you can think of!moving_average <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = mean, side = side, na.rm = na.rm)}moving_sum <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = sum, side = side, na.rm = na.rm)}moving_maximum <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = max, side = side, na.rm = na.rm)}moving_median <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = median, side = side, na.rm = na.rm)}moving_Q1 <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.25)}moving_Q3 <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.75)}moving_10th <-function(x, w =5, side ="centre", na.rm =FALSE) {moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.10)}# Test rolling background:roll10_y <-moving_10th(ts_ex_data_slider$y, w =10, side ="c")# Show the results on a plotts_ex_data_slider %>%bind_cols(roll10_y) %>%ggplot(aes(time, y)) +geom_point(size =1, alpha = .5, color ="grey") +geom_line(aes(time, slide_20), color="red", lty=1) +geom_line(aes(time, roll10_y), color="blue", lty=2) +labs(title ="ARMA(1,1) time series\n slider & rolling 10th %tile",x ="Time axis",y ="Outcome")
New names:
• `` -> `...4`
Loess and kernel smoother examples
Examples in this section follow the nice exposition by Rafael A. Irizarry in his Introduction to Data Science on bin smoothing and loess for how kernel smoothers and loess work. It is worth taking a look at his animated graphics that show the repeated estimation across the length of the data.
Slide 19: 4 example plots with different kernel smoothers
#---- bin and kernel smoothing ----# kernel smoothing, box and normal kernel# note the Gaussian is a smoother function, even for the same spanspan <-10fit_ru <-with(ts_ex_data, ksmooth(time, y, kernel ="box", bandwidth = span))fit_rw <-with(ts_ex_data, ksmooth(time, y, kernel ="normal", bandwidth = span))span <-50fit_su <-with(ts_ex_data, ksmooth(time, y, kernel ="box", bandwidth = span))fit_sw <-with(ts_ex_data, ksmooth(time, y, kernel ="normal", bandwidth = span))# 4 separate plots for now:# Rough, unweightedts_ex_data %>%mutate(smooth = fit_ru$y) %>%ggplot(aes(time, y)) +geom_point(size =1, alpha = .5, color ="grey") +geom_line(aes(time, smooth), color="red") +labs(title ="ARMA(1,1) time series\n box smooth (span 10)",x ="Time axis",y ="Outcome")
Loess is a locally weighted regression model fit over a small span. The default in loewss is a 2nd degree polynomial, rather than a linear regression line.
#---- loess example ----# first convert to a tibblets_ex_data <- ts_ex_data %>%tibble()# choose a span based on the length of the data# default span is .75; we are using .1 (50/500)span <- .25# consider two different loess smoothers, linear and polynomialfit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)# the default is degree = 2fit2 <-loess(y ~ time, span = span, data=ts_ex_data)ts_ex_data %>%mutate(smooth1 = fit1$fitted,smooth2 = fit2$fitted) %>%ggplot(aes(time, y)) +geom_point(size =1, alpha = .5, color ="grey") +geom_line(aes(time, smooth1), color="red", lty =1) +geom_line(aes(time, smooth2), color="blue", lty =2) +labs(title ="ARMA(1,1) time series\n with loess smooth (span .25)",x ="Time axis",y ="Outcome")
# short spanspan <- .1# consider two different loess smoothers, linear and polynomialfit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)# the default is degree = 2fit2 <-loess(y ~ time, span = span, data=ts_ex_data)ts_ex_data %>%mutate(smooth1 = fit1$fitted,smooth2 = fit2$fitted) %>%ggplot(aes(time, y)) +geom_point(size =1, alpha = .5, color ="grey") +geom_line(aes(time, smooth1), color="red", lty =1) +geom_line(aes(time, smooth2), color="blue", lty =2) +labs(title ="ARMA(1,1) time series\n with loess smooth (span .1)",x ="Time axis",y ="Outcome")
# long spanspan <- .75# consider two different loess smoothers, linear and polynomialfit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)# the default is degree = 2fit2 <-loess(y ~ time, span = span, data=ts_ex_data)ts_ex_data %>%mutate(smooth1 = fit1$fitted,smooth2 = fit2$fitted) %>%ggplot(aes(time, y)) +geom_point(size =1, alpha = .5, color ="grey") +geom_line(aes(time, smooth1), color="red", lty =1) +geom_line(aes(time, smooth2), color="blue", lty =2) +labs(title ="ARMA(1,1) time series\n with loess smooth (span .75)",x ="Time axis",y ="Outcome")
Slide 22: Smooth vs. rough decomposition
#---- smooth vs. rough ----# after minimal smoothing -- long spanspan <- .75fit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)ts_ex_data %>%mutate(smooth1 = fit1$fitted,resid1 = y - fit1$fitted) %>%ggplot(aes(time, resid1)) +geom_point(size = .75, alpha = .5, color ="black") +geom_abline(intercept=0, slope =0, color="red", lty =1) +labs(title ="ARMA(1,1) residual time series\n light smoothing removed",x ="Time axis",y ="Residual Outcome")
# after more aggressive smoothing -- wigglier curvespan <- .1fit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)ts_ex_data %>%mutate(smooth1 = fit1$fitted,resid1 = y - fit1$fitted) %>%ggplot(aes(time, resid1)) +geom_point(size = .75, alpha = .5, color ="black") +geom_abline(intercept=0, slope =0, color="red", lty =1) +labs(title ="ARMA(1,1) residual time series\n wiggly smoother removed",x ="Time axis",y ="Residual Outcome")
Slide 23: Differencing
#---- Differencing ----# first difference:ts_ex_diff <- ts_ex_data %>%mutate(diff = y -lag(y, default =first(y), order_by = time))#ts_ex_diff# plot the first differences and check how smooth they are:span =0.75fit1 <-loess(y ~ time, degree=1, span = span, data=ts_ex_data)fit2 <-loess(diff ~ time, degree=1, span = span, data=ts_ex_diff)ts_ex_diff %>%mutate(smooth1 = fit2$fitted,resid1 = y - fit2$fitted) %>%ggplot(aes(time, diff)) +geom_point(size = .75, alpha = .5, color ="black") +geom_line(aes(time, smooth1), color="red", lty =1) +labs(title ="ARMA(1,1) first differences\n w/ loess smooth (span .75)",x ="Time axis",y ="Residual First Difference Outcome")
#compare with the original datats_ex_data %>%mutate(smooth1 = fit1$fitted,resid1 = y - fit1$fitted) %>%ggplot(aes(time, y)) +geom_point(size = .75, alpha = .5, color ="black") +geom_line(aes(time, smooth1), color="red", lty =1) +labs(title ="ARMA(1,1) time series\n w/ loess smooth (span .75)",x ="Time axis",y ="Outcome y")